数据处理完整思路

1:载入需要的数据。原始数据中除抑郁问卷外,还包括焦虑问卷,因此只载入测量抑郁的问卷,及人口学信息。具体为DSRSC、CDI、PHQ-9、和DASS-21中测量抑郁的题目。

2:转换数据为数值型。

3:处理缺失值(实际没有缺失值)。

4:原始数据-1,用以方便计算检出率。原始数据的范围与量表正常的数据范围是不一致的。例如:DSRSC量表的计分范围正常应该为0-2,检出率为≥15。但咱们收到的原始数据计分范围就是1-3,也就是所有数值都+了1。因此-1后才能计算正确的检出率。

5:描述统计,计算了1、性别比例。2、年龄范围。3、由于年龄范围显示为0-29岁,因此筛选出来了6岁以下的数据(也可以改成其他岁数,我随便挑了6岁),看看是否需要处理这些数据。HCP: 不一定需要筛选,但需要向李医生确定一下:对于比较小的患者,比如6岁以下,是自己做答还是父母代答。

6:研究目的:检验问卷内合并的题目,以及问卷间测量同一个症状的题目的相关系数与其他题目是否有区别。对此先进行了数据质量检查。首先,使用原始数据进行了相关分析,发现问题:1、DSRSC问卷内相关有正有负。2、CDI问卷内相关有正有负。因此,计算了反向计分后的相关。发现问题:反向计分后的DSRSC问卷内相关都是正的,但是和其他三个问卷都是负相关。

由于目前的计分应该是有问题的,暂时没进一步分析,预计后续的处理。7:验证不同问卷检出率是否有所不同,因此进行检出率分析。

# 载入必要的包
library(here)
## here() starts at D:/心理健康测量/MH_CPL/5_Analysis/5_1_Depression/5_1_2 Measurement data analysis
library(bruceR)
## 
## bruceR (v2023.9)
## Broadly Useful Convenient and Efficient R functions
## 
## Packages also loaded:
## ✔ data.table ✔ emmeans
## ✔ dplyr      ✔ lmerTest
## ✔ tidyr      ✔ effectsize
## ✔ stringr    ✔ performance
## ✔ ggplot2    ✔ interactions
## 
## Main functions of `bruceR`:
## cc()             Describe()  TTEST()
## add()            Freq()      MANOVA()
## .mean()          Corr()      EMMEANS()
## set.wd()         Alpha()     PROCESS()
## import()         EFA()       model_summary()
## print_table()    CFA()       lavaan_summary()
## 
## For full functionality, please install all dependencies:
## install.packages("bruceR", dep=TRUE)
## 
## Online documentation:
## https://psychbruce.github.io/bruceR
## 
## To use this package in publications, please cite:
## Bao, H.-W.-S. (2023). bruceR: Broadly useful convenient and efficient R functions (Version 2023.9) [Computer software]. https://CRAN.R-project.org/package=bruceR
## 
## NEWS: A new version of bruceR (2024.6) is available (2024-06-13)!
## 
## ***** Please update *****
## install.packages("bruceR", dep=TRUE)
## 
## These packages are dependencies of `bruceR` but not installed:
## - cowplot, ggtext, see, lmtest, vars, phia, BayesFactor, GPArotation
## 
## ***** Install all dependencies *****
## install.packages("bruceR", dep=TRUE)
## 
## 载入程序包:'bruceR'
## The following object is masked _by_ 'package:data.table':
## 
##     %notin%
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ lubridate 1.9.3     ✔ tibble    3.2.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ data.table::between() masks dplyr::between()
## ✖ Matrix::expand()      masks tidyr::expand()
## ✖ dplyr::filter()       masks stats::filter()
## ✖ data.table::first()   masks dplyr::first()
## ✖ lubridate::hour()     masks data.table::hour()
## ✖ lubridate::isoweek()  masks data.table::isoweek()
## ✖ dplyr::lag()          masks stats::lag()
## ✖ data.table::last()    masks dplyr::last()
## ✖ lubridate::mday()     masks data.table::mday()
## ✖ lubridate::minute()   masks data.table::minute()
## ✖ lubridate::month()    masks data.table::month()
## ✖ Matrix::pack()        masks tidyr::pack()
## ✖ lubridate::quarter()  masks data.table::quarter()
## ✖ lubridate::second()   masks data.table::second()
## ✖ purrr::transpose()    masks data.table::transpose()
## ✖ Matrix::unpack()      masks tidyr::unpack()
## ✖ lubridate::wday()     masks data.table::wday()
## ✖ lubridate::week()     masks data.table::week()
## ✖ lubridate::yday()     masks data.table::yday()
## ✖ lubridate::year()     masks data.table::year()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
## Warning: 程序包'openxlsx'是用R版本4.4.1 来建造的
library(ggcorrplot)
## Warning: 程序包'ggcorrplot'是用R版本4.4.1 来建造的
#载入数据
raw_data <- bruceR::import(here::here("data", "Rawdata1.xlsx"))
## New names:
## • `量表ID` -> `量表ID...6`
## • `量表名称` -> `量表名称...7`
## • `量表ID` -> `量表ID...49`
## • `量表ID` -> `量表ID...69`
## • `量表ID` -> `量表ID...80`
## • `量表名称` -> `量表名称...81`
## • `量表ID` -> `量表ID...89`
## • `量表ID` -> `量表ID...98`
#筛选与抑郁有关的数据
selected_data <- raw_data %>%
  select(就诊卡号, 性别, 年龄, 
         DSRSC1:DSRSC18, 
         PHQ1:PHQ9, 
         DASS3, DASS5, DASS10, DASS13, DASS16, DASS17, DASS21, 
         CDI1:CDI27)

#转换数据类型
selected_data_numeric <- selected_data %>%
  mutate(性别 = factor(性别, levels = c("男", "女"), labels = c(1, 2))) %>%
  mutate(across(matches("性别|年龄|DSRSC|PHQ|DASS|CDI"), as.numeric))

#处理缺失值
clean_data <- selected_data_numeric %>%
  drop_na()

#原始数据-1,用以方便计算检出率
transformed_data <- clean_data %>%
  mutate(across(matches("DSRSC|PHQ|DASS|CDI"), ~ .x - 1))

#描述统计
# 计算性别比例
gender_proportion <- clean_data %>%
  summarise(
    male = sum(性别 == 1, na.rm = TRUE),
    female = sum(性别 == 2, na.rm = TRUE),
    total = n(),
    male_proportion = male / total * 100,
    female_proportion = female / total * 100
  )

# 打印性别比例
print(gender_proportion)
##   male female total male_proportion female_proportion
## 1 6754   6078 12832        52.63404          47.36596
# 计算年龄的范围
age_range <- clean_data %>%
  summarise(
    min_age = min(年龄, na.rm = TRUE),
    max_age = max(年龄, na.rm = TRUE)
  )

# 打印年龄范围
print(age_range)
##   min_age max_age
## 1       0      29
# 筛选出年龄在6岁以下的数据
under_6_data <- clean_data %>%
  filter(年龄 < 6)

# 打印出年龄在6岁以下的数据
print(under_6_data)
##      就诊卡号 性别 年龄 DSRSC1 DSRSC2 DSRSC3 DSRSC4 DSRSC5 DSRSC6 DSRSC7 DSRSC8
## 1  0012712415    1    5      2      1      3      1      3      1      2      2
## 2  0012768596    1    2      2      2      1      1      3      2      3      2
## 3  0012808861    1    0      1      1      2      1      3      3      1      1
## 4  0012858882    2    0      2      3      2      2      3      2      2      2
## 5  1215857130    1    1      2      1      2      2      3      3      2      1
## 6  0012939870    1    5      1      2      3      1      2      3      1      2
## 7  0012989858    2    2      3      1      2      1      2      3      1      2
## 8  0009459552    1    5      1      1      2      1      3      2      1      1
## 9  0009491458    1    5      1      1      2      1      3      2      2      2
## 10 0012680982    2    0      1      1      2      1      3      2      1      1
## 11 0013030057    1    3      1      1      2      1      3      3      1      1
## 12 0011959389    2    5      1      1      3      1      3      2      2      2
## 13 0013497374    1    0      3      1      3      2      3      3      3      2
## 14 0008993491    2    0      3      3      1      1      3      1      1      2
## 15 0013593099    2    5      1      1      3      1      3      3      1      1
## 16 0013615049    1    2      2      2      3      2      3      3      1      2
## 17 0013784963    2    3      1      1      3      3      3      1      3      2
## 18 0013786955    2    5      1      2      3      1      3      1      1      2
## 19 0013939368    1    0      2      2      1      2      3      3      2      1
## 20 0013943658    1    5      2      1      3      1      3      3      1      1
## 21 0013981579    1    5      2      3      3      1      3      3      2      2
##    DSRSC9 DSRSC10 DSRSC11 DSRSC12 DSRSC13 DSRSC14 DSRSC15 DSRSC16 DSRSC17
## 1       2       3       2       2       1       2       3       1       3
## 2       2       3       2       2       2       2       3       2       2
## 3       1       3       1       1       1       3       3       1       3
## 4       2       2       2       3       2       2       3       2       2
## 5       3       3       2       2       2       3       2       1       3
## 6       1       3       3       1       1       2       3       1       3
## 7       2       2       3       2       3       1       3       2       3
## 8       1       3       1       2       1       3       2       1       3
## 9       2       3       2       1       1       2       1       1       3
## 10      1       3       2       2       1       3       3       1       2
## 11      2       3       1       1       1       2       2       1       3
## 12      3       3       1       2       1       3       3       1       3
## 13      1       3       1       2       2       1       3       3       3
## 14      3       1       3       3       3       2       3       3       3
## 15      1       3       1       1       1       3       3       1       3
## 16      2       1       2       2       3       2       3       3       3
## 17      3       2       3       2       2       3       3       2       2
## 18      1       3       1       1       1       3       3       1       3
## 19      3       2       3       2       1       2       2       1       2
## 20      2       3       2       1       1       2       2       1       3
## 21      2       2       2       2       3       3       2       2       2
##    DSRSC18 PHQ1 PHQ2 PHQ3 PHQ4 PHQ5 PHQ6 PHQ7 PHQ8 PHQ9 DASS3 DASS5 DASS10
## 1        2    1    2    1    1    4    1    4    1    1     1     3      1
## 2        2    4    3    3    2    1    2    4    2    2     2     2      2
## 3        2    1    1    4    1    1    1    1    1    1     1     1      1
## 4        1    2    2    4    4    1    2    1    4    2     4     4      4
## 5        3    4    2    4    1    4    3    4    4    1     3     4      2
## 6        2    4    2    4    1    3    3    3    1    1     3     1      1
## 7        1    4    2    4    4    4    4    4    4    4     4     4      4
## 8        2    2    2    4    1    1    1    2    3    1     1     4      1
## 9        3    1    4    4    1    1    1    4    2    1     2     3      1
## 10       2    1    2    1    1    1    1    1    1    1     1     1      1
## 11       3    2    2    2    1    1    2    1    2    1     1     1      1
## 12       3    1    2    4    2    2    1    1    1    1     1     1      1
## 13       3    1    1    4    1    4    3    1    1    1     2     2      2
## 14       3    2    2    4    4    2    1    1    1    4     1     1      1
## 15       3    2    1    2    2    2    1    1    1    1     2     1      1
## 16       2    1    1    1    1    1    1    1    1    1     1     2      1
## 17       2    4    2    1    4    4    4    3    2    2     2     4      4
## 18       3    1    2    3    2    4    1    1    2    1     1     2      1
## 19       1    2    2    4    2    2    2    1    4    2     1     4      3
## 20       2    1    2    2    1    2    1    1    4    1     2     3      1
## 21       2    2    2    3    2    4    1    2    3    2     2     4      2
##    DASS13 DASS16 DASS17 DASS21 CDI1 CDI2 CDI3 CDI4 CDI5 CDI6 CDI7 CDI8 CDI9
## 1       1      2      1      1    3    2    3    2    1    1    3    3    1
## 2       3      2      2      1    3    1    2    2    2    3    2    2    2
## 3       1      1      1      1    1    2    2    1    1    3    3    3    1
## 4       2      2      1      2    1    1    3    2    2    1    2    3    2
## 5       2      3      1      1    2    2    3    2    1    1    2    2    2
## 6       1      1      1      1    1    2    2    1    1    1    3    3    2
## 7       2      4      1      4    1    1    3    2    1    3    2    2    2
## 8       2      1      1      1    2    2    2    2    1    1    3    3    1
## 9       1      1      1      1    1    2    2    2    1    2    3    3    1
## 10      1      2      1      1    1    2    2    2    3    1    3    2    1
## 11      1      1      1      1    2    3    2    1    1    1    3    3    1
## 12      2      1      1      1    1    3    1    2    1    1    3    3    1
## 13      1      1      1      1    1    1    1    2    3    2    3    3    1
## 14      1      1      1      2    1    2    2    2    3    1    2    1    2
## 15      1      1      1      1    1    2    2    1    3    1    3    3    1
## 16      1      1      1      1    1    2    2    2    3    1    3    3    1
## 17      1      3      4      4    1    1    3    2    3    1    2    2    2
## 18      1      1      1      1    1    2    2    1    3    1    3    3    1
## 19      2      1      2      2    1    1    3    2    1    1    2    1    2
## 20      1      2      1      1    1    2    1    1    3    1    3    3    1
## 21      3      3      1      2    3    1    2    2    2    2    2    3    2
##    CDI10 CDI11 CDI12 CDI13 CDI14 CDI15 CDI16 CDI17 CDI18 CDI19 CDI20 CDI21
## 1      3     3     1     2     1     1     3     1     2     1     1     1
## 2      1     2     3     2     2     2     2     2     2     2     2     2
## 3      3     3     1     3     1     3     3     1     3     2     1     3
## 4      3     3     2     1     1     2     2     2     2     2     1     1
## 5      2     2     2     2     1     2     3     1     3     1     2     1
## 6      3     3     1     3     1     1     2     1     2     1     1     1
## 7      1     1     1     2     3     2     3     3     3     3     2     3
## 8      3     3     1     3     1     1     1     1     3     1     2     1
## 9      3     3     1     3     1     3     3     1     2     1     2     2
## 10     3     3     3     3     1     3     3     1     3     1     2     3
## 11     3     3     2     1     1     1     3     1     3     1     2     3
## 12     3     3     1     3     1     3     3     1     3     1     1     1
## 13     3     3     2     2     1     2     1     1     3     1     2     2
## 14     2     3     1     3     1     3     2     3     2     1     1     2
## 15     3     3     1     3     1     3     3     1     3     1     1     2
## 16     3     3     1     3     1     3     3     1     2     1     1     2
## 17     3     3     2     1     2     3     3     2     2     1     1     1
## 18     3     3     1     3     1     3     3     1     2     1     1     3
## 19     3     3     1     2     2     2     2     1     3     2     1     2
## 20     3     3     1     2     1     1     3     1     2     1     1     2
## 21     3     1     2     1     1     1     3     2     1     1     2     1
##    CDI22 CDI23 CDI24 CDI25 CDI26 CDI27
## 1      2     1     2     3     2     2
## 2      2     3     1     2     1     2
## 3      1     2     2     3     2     1
## 4      3     3     1     1     2     3
## 5      2     3     1     3     2     3
## 6      1     1     3     3     1     1
## 7      1     1     2     3     2     1
## 8      2     1     3     3     2     2
## 9      2     1     3     3     2     3
## 10     1     2     2     3     1     1
## 11     1     1     3     3     1     1
## 12     2     1     2     3     2     2
## 13     2     2     1     3     2     2
## 14     2     1     1     3     2     2
## 15     2     1     3     3     1     1
## 16     1     2     2     2     1     1
## 17     2     3     1     3     2     1
## 18     1     1     3     3     1     1
## 19     2     3     2     3     2     3
## 20     1     1     1     3     2     2
## 21     2     2     2     2     3     2
#反向计分前的相关分析
correlation_matrix <- transformed_data %>%
  select(matches("DSRSC|PHQ|DASS|CDI")) %>%
  cor()

#导出
output_file <- here::here("output", "correlation_matrix.xlsx")
write.xlsx(correlation_matrix, output_file)

#可视化
p <- ggcorrplot(correlation_matrix, lab = TRUE, 
                method = "circle", 
                outline.color = "white", 
                colors = c("blue", "white", "red"), 
                ggtheme = ggplot2::theme_minimal(base_family = "sans"), 
                title = "未反向计分的相关系数") +
  theme_minimal(base_family = "sans") + 
  theme(
    text = element_text(size = 20, color = "black"), 
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 14), 
    axis.text.y = element_text(size = 14), 
    axis.title = element_text(size = 22), 
    plot.background = element_rect(fill = "gray90"), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.background = element_rect(fill = "gray90", color = NA), 
    legend.position = "bottom", 
    legend.key.width = unit(4, "cm"), 
    legend.key.height = unit(1, "cm")
  ) +
  scale_size_continuous(range = c(5, 12)) +
  labs(x = "", y = "")
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
#保存图片
ggsave(here::here("output", "correlation_heatmap.png"), 
       plot = p, width = 32, height = 28, dpi = 300)
未反向计分的相关系数
未反向计分的相关系数
# 反向计分后的相关分析
# 定义需要反向计分的变量
reverse_vars <- c("DSRSC1", "DSRSC2", "DSRSC4", "DSRSC7", "DSRSC8", "DSRSC9", "DSRSC11", "DSRSC12", "DSRSC13", "DSRSC16",
                  "CDI2", "CDI5", "CDI7", "CDI8", "CDI10", "CDI11", "CDI13", "CDI15", "CDI16", "CDI18", "CDI21", "CDI24", "CDI25")

# 进行反向计分
reverse_scored_data <- transformed_data %>%
  mutate_at(vars(one_of(reverse_vars)), ~2 - .x)
# 进行反向计分后的相关分析
reverse_scored_correlation_matrix <- reverse_scored_data %>%
  select(matches("DSRSC|PHQ|DASS|CDI")) %>%
  cor()

# 导出反向计分后的相关矩阵
output_file_reverse <- here::here("output", "reverse_scored_correlation_matrix.xlsx")
write.xlsx(reverse_scored_correlation_matrix, output_file_reverse)

# 可视化反向计分后的相关矩阵
p_reverse <- ggcorrplot(reverse_scored_correlation_matrix, lab = TRUE, 
                        method = "circle", 
                        outline.color = "white", 
                        colors = c("blue", "white", "red"), 
                        ggtheme = ggplot2::theme_minimal(base_family = "sans"), 
                        title = "反向计分后的相关系数") +
  theme_minimal(base_family = "sans") + 
  theme(
    text = element_text(size = 20, color = "black"), 
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, size = 14), 
    axis.text.y = element_text(size = 14), 
    axis.title = element_text(size = 22), 
    plot.background = element_rect(fill = "gray90"), 
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(), 
    panel.background = element_rect(fill = "gray90", color = NA), 
    legend.position = "bottom", 
    legend.key.width = unit(4, "cm"), 
    legend.key.height = unit(1, "cm")
  ) +
  scale_size_continuous(range = c(5, 12)) +
  labs(x = "", y = "")
## Scale for size is already present.
## Adding another scale for size, which will replace the existing scale.
# 保存反向计分后的相关系数热图
ggsave(here::here("output", "reverse_scored_correlation_heatmap.png"), 
       plot = p_reverse, width = 32, height = 28, dpi = 300)
Reverse Scored Correlation Heatmap
Reverse Scored Correlation Heatmap